####################################
#Diff-in-diff with matching and dual-distance cutoffs - PA figure, dd with dd 
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
#76/276/70, 476: HAS ez-passes  
#79, 80, 81, : NO EZPass 
####################################

########################
#Initial setup 
########################
#set wd
setwd("~/Dropbox/Collaborations/Traffic/project_analysis")

#external libraries 
library(ggplot2)

#load in data 
NJ_data <- read.csv("NJ_precinct_master.csv", header=T)
PA_data <- read.csv("PA_precinct_master.csv", header=T)
colnames(PA_data)[which(colnames(PA_data)=="EZPlaza_TravelDistOptimizedDist_PA_Total_Leng")] <- "EZPlaza_TravelDistOptimizedDist_Total_Leng"
colnames(NJ_data)[which(colnames(NJ_data)=="EZPlaza_TravelDistOptimizedDist_NJ_Total_Leng")] <- "EZPlaza_TravelDistOptimizedDist_Total_Leng"

colnames(NJ_data)[which(colnames(NJ_data)=="USP_00")] <- "USPP2000"
colnames(NJ_data)[which(colnames(NJ_data)=="USP_04")] <- "USPP2004"

#outcomes 
outcome_time1_name <- "USPP2000"
outcome_time2_name <- "USPP2004"  #US president (dem) percent 2000, 2004

#identify matching variables 
#matching_variables <- c("cepctwhite", "cepctblack", "cepctwomen", "cnipoverty", "cnpopurban") #income, commute length, number of cars, etc.
matching_variables <- c("census_2000_pblack", "census_2000_pfam_belowpoverty", "census_2000_ppop_over65") 

#treated basis (something having to do with E-ZPasses)
treatment_basis_name <- "EZPlaza_TravelDistOptimizedDist_Total_Leng"

#control basis (something having to do with non-E-ZPass highway)
control_basis_name <- c("minNetworkOptimizedDistance_Distance_nonEZ_hwy")

#other variables to include in the regression
fixed_effect_names <- c("COUNTYFP10", 
                        "STATEFP10")

NJ_data_reduced <- NJ_data[, c(treatment_basis_name, control_basis_name, fixed_effect_names, 
                               matching_variables, outcome_time1_name, outcome_time2_name) ]
PA_data_reduced <- PA_data[, c(treatment_basis_name, control_basis_name, fixed_effect_names, 
                               matching_variables, outcome_time1_name, outcome_time2_name) ]

my_data <- rbind(NJ_data_reduced, PA_data_reduced)
use_index1 <- which( (!(is.na(my_data$COUNTYFP10))) & (my_data$STATEFP10==34) )  
use_index2 <- which( (!(is.na(my_data$COUNTYFP10))) & (my_data$STATEFP10==42) )  
my_data[use_index1,]$COUNTYFP10 <- paste("state1_", my_data[use_index1, ]$COUNTYFP10, sep="")
my_data[use_index2,]$COUNTYFP10 <- paste("state2_", my_data[use_index2,]$COUNTYFP10, sep="") 

my_data <- na.omit(my_data)


##############################
#loop to generate figures
##############################
##############################
max_length_out <- 5; min_length_out <- 5;
#max_seq <- seq(250, 3000, length.out = max_length_out)#crow
#min_seq <- seq(250, 38000, length.out = min_length_out)#crow
max_seq <- seq(2, 7.25, length.out = max_length_out)# max_seq <- seq(5, 14, length.out = max_length_out)
min_seq <- seq(7, 15, length.out = min_length_out)# min_seq <- seq(14, 36.7, length.out = min_length_out)

source("./causal_effect_estimation/dd_with_linearSDs_function.R")
BIG_STORAGE_LIST <- list()
counter1 <- 0 
for(j in min_seq)
{ 
  counter1 <- counter1 + 1 
  results_list <- list(); counter2 <- 0; for(i in max_seq)
  { 
    counter2 <- counter2 + 1
    print(counter2)
    results_list[[counter2]] <- dd_with_linearSDs(my_data=my_data,
                                                  treatment_basis_name=treatment_basis_name, 
                                                  control_basis_name=control_basis_name, 
                                                  outcome_time1_name=outcome_time1_name, 
                                                  outcome_time2_name=outcome_time2_name,
                                                  treatment_basis_treated_max=i, 
                                                  treatment_basis_control_min=j, 
                                                  control_basis_control_max=i,
                                                  type="binary_controls",
                                                  fixed_effect_names = fixed_effect_names,  
                                                  control_basis_treated_min=j,
                                                  start_browser=F, 
                                                  match=T)
  }
  BIG_STORAGE_LIST[[counter1]] <- results_list
}

####################
#save and plot results
####################
#save results as appropriate objects 
results_list <- unlist(BIG_STORAGE_LIST, recursive=F, use.names=T)
match_name <- "linearSDs_PA"
index_seq <- seq(1, max_length_out*min_length_out)
for(index in 1:min_length_out)
{
  start <- max_length_out*(index-1)+1
  end <- max_length_out*(index)
  
  internal_list <- results_list[start:end]
  
  #save results as appropriate objects 
  unlisted_results <- unlist(internal_list, recursive=T, use.names=T) #$base_result
  base_results <- unlisted_results[which(names(unlisted_results)=="lm_summary.Estimate")]
  SDs <- unlisted_results[which(names(unlisted_results)=="lm_summary.Std. Error")]
  
  #setup for ggplot 2 
  gg_data <- data.frame(cbind(max_seq, base_results, base_results-1.96*SDs, base_results+1.96*SDs))
  colnames(gg_data)[ncol(gg_data)-1] <- "lower"
  colnames(gg_data)[ncol(gg_data)] <- "upper"
  
  g <- ggplot(gg_data, aes(x=max_seq, y=base_results)) + 
    geom_point() + 
    geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
    geom_hline(yintercept = 0, line_type="dashed") + 
    geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
    ylab("DiD effect (in % points)") + 
    xlab("Maximum distance from E-ZPass or Control Highway") + 
    ggtitle("Maximum Distance vs. DiD Effect (in % points)") + 
    theme(text = element_text(size=13)) 
  ggsave(g, file=paste(match_name, sprintf("%i.png", index), sep=""))
}




